# Attach the parckages that will be required for the different parts of the analysis:
# For general stuff:
library(tidyverse)
library(janitor)
library(lubridate)
library(here)
library(paletteer)
# For ts stuff:
library(tsibble)
library(fable)
library(fabletools)
library(feasts)
library(forecast)
# For spatial stuff:
library(sf)
library(tmap)
library(mapview)
Using steelhead salmon observation data wrangle, explore, and visualize time series data for steelhead salmon passage across the Bonneville Dam (Oregon) from 1940 - 2019.
Visualize a time series plot of the original daily observation counts from 1940 - 2019. Use the monthly sums for each year to finalize a seasonal plot to explore if/how monthly passage has shifted over time. Find the sum of annual observation counts for each year and create a visualization of annual steelhead passage counts. Create a finalized plot of the annual mean obsrvation counts of steelhead salmon for each year and determined if any trends are observed.
The data has already been previously filtered to only include observations of steelhead salmon. Steelhead are native rainbow trout, which migrate to the ocean as juvenile fish and return to fresh water as adults to spawn. The Bonneville Dam is located 40 miles (64 km) east of Portland, Oregon, in the Columbia River Gorge. The primary functions of Bonneville Dam is to generate electrical power.
Image 1: Koenig, M. Idaho Department of Fish and Game, Brent Beller, October 17, 2019, https://idfg.idaho.gov/press/oct-14-upper-salmon-river-steelhead-fishing-report. Image 2: “Spillway, Bonneville Dam”, Bonneville Dam wikipedia, June 20, 2013, https://en.wikipedia.org/wiki/Bonneville_Dam#/media/File:Spillway,_Bonneville_Dam-2.jpg.
Data from: Columbia Basin Research: Adult fish passage (http://www.cbr.washington.edu/dart/query/adult_graph_text) Citation: Columbia River DART, Columbia Basin Research, University of Washington. (2019). Adult Passage Graphics & Text. Available from http://www.cbr.washington.edu/dart/query/adult_graph_text
# Read in the fish passage date 'cbr_fish_passage_bonneville_allyrs_steelhead.csv' using read_csv:
steelhead_passage <- read_csv("cbr_fish_passage_bonneville_allyrs_steelhead.csv")
# Explore the data:
summary(steelhead_passage)
## year mm-dd location parameter
## Min. :1939 Length:29646 Length:29646 Length:29646
## 1st Qu.:1959 Class :character Class :character Class :character
## Median :1979 Mode :character Mode :character Mode :character
## Mean :1979
## 3rd Qu.:1999
## Max. :2019
##
## unit datatype value
## Length:29646 Length:29646 Min. : -75.0
## Class :character Class :character 1st Qu.: 29.0
## Mode :character Mode :character Median : 95.0
## Mean : 705.8
## 3rd Qu.: 745.0
## Max. :34053.0
## NA's :5657
# Unite the year and date comlums and use lubridate to make it a date value:
date_steelhead <- steelhead_passage %>%
unite(new_date, c("mm-dd", year), remove=FALSE) %>%
mutate(yr_mo_day = lubridate::parse_date_time(new_date, order = "dmy")) %>%
# Coerce to tsibble `yearmonth` format and remove the NA values:
mutate(day_sep = ymd(yr_mo_day)) %>%
mutate(value = as.numeric(value)) %>%
drop_na(day_sep, value) %>%
drop_na(yr_mo_day, value)
# Explore the data:
summary(date_steelhead)
## new_date year mm-dd location
## Length:23989 Min. :1939 Length:23989 Length:23989
## Class :character 1st Qu.:1957 Class :character Class :character
## Mode :character Median :1979 Mode :character Mode :character
## Mean :1979
## 3rd Qu.:2003
## Max. :2019
## parameter unit datatype
## Length:23989 Length:23989 Length:23989
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## value yr_mo_day day_sep
## Min. : -75.0 Min. :1939-01-01 00:00:00 Min. :1939-01-01
## 1st Qu.: 29.0 1st Qu.:1957-03-03 00:00:00 1st Qu.:1957-03-03
## Median : 95.0 Median :1979-10-27 00:00:00 Median :1979-10-27
## Mean : 705.8 Mean :1979-10-20 04:13:29 Mean :1979-10-20
## 3rd Qu.: 745.0 3rd Qu.:2003-07-12 00:00:00 3rd Qu.:2003-07-12
## Max. :34053.0 Max. :2019-12-15 00:00:00 Max. :2019-12-15
# Parse the year, month and day so that we can explore the date in different time variables:
parsed_steelhead <-date_steelhead %>%
mutate(month = month(yr_mo_day, label = TRUE)) %>%
mutate(year = year(yr_mo_day)) %>%
mutate(day = day(yr_mo_day))
# Make a time series plot of the original observation (daily data):
daily_steelhead <-
ggplot(data = date_steelhead, aes(x = day_sep, y = value)) +
geom_bar(stat = "identity",
# Set the asthetics to represent the change in time (year) by color:
aes(color = year)) +
# Add labels and change the plot asthetics
theme_minimal() +
scale_color_continuous(name = "Year") +
labs(title = "Daily Observations of Steelhead Salmon (1939 - 2019)",
subtitle = "Time Series Data for Passage Across the Bonneville Dam (Oregon)",
x = "Date of Observation (Daily Data)", y = "Count (Number of Steelhead Observations)") +
theme(text = element_text(family = "serif"),
plot.title = element_text(size=14, face="bold"),
plot.subtitle = element_text(size = 9, face = "bold.italic"),
axis.title.x = element_text(size=10, face="bold", vjust = -1),
axis.title.y = element_text(size=10, face="bold", vjust = 2),
strip.text.x = element_text(size = 10, face = "bold.italic"),
legend.title = element_text(size = 10, face = "bold.italic"))
daily_steelhead
Figure 1: Daily observations of steelhead salmon across the Bonneville Dam in Oregon.
The daily observations plot suggest that there might be seasonal trends in steelhead salmon observations. There also seems to be a large spike in daily count observations sometime around 2010.
# Group the counts by month and year then add the counts for each month of each year observed
season_steelhead <- parsed_steelhead %>%
group_by(month, year) %>%
summarise(value = sum(value))
# Plot the chnage in seasons represented by months of the year
ggplot(data = season_steelhead, aes(x = month, y = value, group = year)) +
geom_line(aes(color = year)) +
# Add labels and change the asthetics
theme_minimal() +
scale_color_continuous(name = "Year") +
labs(title = "Seasonal Observations of Steelhead Salmon (1939 - 2019)",
subtitle = "Shifts in Monthly Passage Across the Bonneville Dam (Oregon)",
x = "Month of Observation (Seasonal)", y = "Count (Number of Steelhead Observations)") +
theme(text = element_text(family = "serif"),
plot.title = element_text(size=14, face="bold"),
plot.subtitle = element_text(size = 9, face = "bold.italic"),
axis.title.x = element_text(size=10, face="bold", vjust = -1),
axis.title.y = element_text(size=10, face="bold", vjust = 2),
strip.text.x = element_text(size = 10, face = "bold.italic"),
legend.title = element_text(size = 10, face = "bold.italic"))
Figure 2: Seasonal observations (per Month) of steelhead salmon passage across the Bonneville Dam in Oregon. Annual (yearly) changes in seasonal observation represented by color.
Our seasonal plot shows that in general there is little change in monthly steelhead salmon count observations. The exception to these trends are in the months of July, August, and September. August seems to have had significatly large change in observations counts over the years.
# Group the observation counts by year and add the observation counts by year.
annual_steelhead <- parsed_steelhead %>%
group_by(year) %>%
summarise(value = sum(value))
# Plot the number of total observations for each year
ggplot(data = annual_steelhead, aes(x = year, y = value)) +
geom_bar(stat = "identity", aes(fill = year)) +
theme_minimal() +
# Add labels and set the asthetics
scale_fill_continuous(name = "Year") +
labs(title = "Annual Sum of Steelhead Salmon Observations (1939 - 2019)",
subtitle = "Sum of Yearly Observations Across the Bonneville Dam (Oregon)",
x = "Year of Observation (Annual)", y = "Count (Annual Sum Steelhead Observations)") +
theme(text = element_text(family = "serif"),
plot.title = element_text(size=14, face="bold"),
plot.subtitle = element_text(size = 9, face = "bold.italic"),
axis.title.x = element_text(size=10, face="bold", vjust = -1),
axis.title.y = element_text(size=10, face="bold", vjust = 2),
strip.text.x = element_text(size = 10, face = "bold.italic"),
legend.title = element_text(size = 10, face = "bold.italic"))
Figure 3: Sum of annual steelhead salmon observations across the Bonneville Dam in Oregon. Yearly change in total annual (yearly) observations represented by color.
The overall trend when looking at annual sums seems to be that steelhead salmon observations are growing. There seems to have be a spike in early 2000 and around 2010.
# Use 'as_tsibble' to create and index by 'day_sep'
steelhead_ts <- as_tsibble(parsed_steelhead, index = day_sep)
# Create annual mean values for which the average count value for each year is calculated
yr_annual_steelh <- steelhead_ts %>%
index_by(annual = ~year(.)) %>%
summarize(
avg_value = mean(value)
)
# Plot the mean observation count by each year and use 'geom_smooth' to represent the overall trend of the annual mean
ggplot(data = yr_annual_steelh, aes(x = annual, y = avg_value)) +
geom_line(color = "darkorange") +
geom_smooth(color = "steelblue",
size = 0.2,
linetype = "dashed",
fill = "steelblue",
alpha = 0.2) +
theme_minimal() +
# Add labels and set the asthetics
labs(title = "Annual Mean of Steelhead Salmon Observations (1939 - 2019)",
subtitle = "Average of Yearly Observations Across the Bonneville Dam (Oregon)",
x = "Year of Observation (Annual)", y = "Count (Annual Mean Steelhead Observations)") +
theme(text = element_text(family = "serif"),
plot.title = element_text(size=14, face="bold"),
plot.subtitle = element_text(size = 9, face = "bold.italic"),
axis.title.x = element_text(size=10, face="bold", vjust = -1),
axis.title.y = element_text(size=10, face="bold", vjust = 2),
strip.text.x = element_text(size = 10, face = "bold.italic"),
legend.title = element_text(size = 10, face = "bold.italic"))
Figure 4: Annual average of steelhead salmon observations across the Bonneville Dam in Oregon. Change in yearly mean of annual observations and the average pattern (trend) of change.
The annual mean plot shows that there has been an increasing trend in the mean count of yearly steelhead salmon observations with a sharp spike in early 2000.